Human

human <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_Human_Heart-Brain-Only.tsv",sep="\t",header=TRUE,stringsAsFactors = FALSE)
human <- human[order(human$Source.Name),]
human$Stage = sapply(human$Characteristics.developmental.stage.,function(y){ifelse(grepl("post conception",y),"embryo",y)})
rna <- read.delim("~/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/human/RNAseq/salmon_gene.matrix.tsv",sep="\t", header = T,row.names = 1,check.names = FALSE)
rna <- rna[,order(colnames(rna))]

rms <- rowMeans(rna)
rna <- rna[which(rms > 0),]
rna <- apply(rna,2,round)
Female
kable(table(human$Stage[which(human$Characteristics.sex.=="female")],human$Characteristics.organism.part.[which(human$Characteristics.sex.=="female")])) %>% kable_styling()
forebrain heart hindbrain
embryo 11 14 14
infant 0 2 1
neonate 2 1 4
school age child 0 0 1
toddler 1 1 1
young adult 0 0 1
Male
kable(table(human$Stage[which(human$Characteristics.sex.=="male")],human$Characteristics.organism.part.[which(human$Characteristics.sex.=="male")])) %>% kable_styling()
forebrain heart hindbrain
adolescent 4 1 4
elderly 2 0 2
embryo 21 24 19
infant 2 2 2
middle adult 2 1 2
neonate 1 2 1
school age child 2 0 2
toddler 2 1 1
young adult 5 1 4

Brain and Heart Together

rna.vst <- vst(rna)

pc <- prcomp(t(rna.vst))
pca <- as.data.frame(pc$x)
plot(pca[,2],pca[,1],col=rainbow(length(unique(human$Characteristics.organism.part.)))[as.factor(human$Characteristics.organism.part.)],pch=c(15,16,4,17,18,8,9,11,3)[as.factor(human$Stage)], cex = 3,main = "Human PC1 vs PC2\n Calculated Using RNA-seq",xlab="PC2",ylab="PC1")

legend("topleft",c("forebrain","heart","hindbrain"),col=rainbow(length(unique(human$Characteristics.organism.part.))),lty=1:1,lwd=5,cex=1.25,bty = "n")

legend("bottomleft",c("adolescent", "elderly", "embryo","infant", "middle adult", "neonate", "school age child", "toddler", "young adult"),pch=c(15,16,4,17,18,8,9,11,3),cex=1.25,bty = "n")

Heart

heart <- human[which(human$Characteristics.organism.part.=="heart"),]
heart <- heart[order(heart$Source.Name),]
h <- rna[,colnames(rna) %in% heart$Source.Name]
h.vst <- vst(h)
h.vst <- h.vst[,order(colnames(h.vst))]

# identical(heart$Source.Name,colnames(h.vst))
# [1] TRUE

pc <- prcomp(t(h.vst))
pca <- as.data.frame(pc$x)

stage_colors <- c( "#FF0000","#FF5100", "#FFA100", "#FFF200", "#BCFF00", "#6BFF00", "#006600", "#000000", "#99CC66", "#00FFD7", "#00D7FF", "#0086FF","#996633","#1B00FF", "#6B00FF", "#BC00FF", "#FF00F2", "#FF00A1", "#FF0051")
kable(table(heart$Stage,heart$Characteristics.sex.)) %>% kable_styling()
female male
adolescent 0 1
embryo 14 24
infant 2 2
middle adult 0 1
neonate 1 2
toddler 1 1
young adult 0 1
plot(pca[,1],pca[,2],col=rainbow(length(unique(heart$Characteristics.sex.)))[as.factor(heart$Characteristics.sex.)],pch=c(15,4,17,18,8,11,3)[as.factor(heart$Stage)], cex = 3,main = "Human Heart PC1 vs PC2\n Calculated Using RNA-seq\nColored by Sex",xlab="PC1",ylab="PC2")

legend("topleft",c("female","male"),col=rainbow(length(unique(heart$Characteristics.sex.))),lty=1:1,lwd=5,cex=1,bty = "n")

legend("bottomright",c("adolescent", "embryo","infant", "middle adult", "neonate", "toddler", "young adult"),pch=c(15,4,17,18,8,11,3),cex=1.5,bty = "n")

pairs(pca[,1:5],col=rainbow(length(unique(heart$Characteristics.sex.)))[as.factor(heart$Characteristics.sex.)],pch=16, main = "PC1 - PC5")

plot(pca[,1],pca[,2],col=stage_colors[as.factor(heart$Characteristics.developmental.stage.)],pch=c(4,16)[as.factor(heart$Characteristics.sex.)], cex = 2,main = "Human Heart PC1 vs PC2\n Calculated Using RNA-seq\nColored by Developmental Stage",xlab="PC1",ylab="PC2")

legend("bottomright",c("10 week post conception", "11 week post conception", "12 week post conception", "13 week post conception",
"16 week post conception", "18 week post conception", "19 week post conception", "4 week post conception", 
"5 week post conception","6 week post conception","7 week post conception", "8 week post conception", "9 week post conception", "adolescent", "elderly", "infant","middle adult", "neonate", "school age child", "toddler", "young adult"),col=stage_colors,lwd=3,cex=1.25,bty = "n",y.intersp = 0.85)

legend("topright",c("female","male"),pch=c(4,16),cex=1,bty = "n")
Association between PC1-5 and Sex
trim <- data.frame(pca[,1:5],Sex=sapply(rownames(pca),function(samp){strsplit(samp,"\\.")[[1]][5]}))

assoc <- c()
for(i in 1:5){
    p<-t.test(trim[which(trim$Sex=="Female"),i],trim[which(trim$Sex=="Male"),i],alternative = "two.sided")$p.value
    assoc <- rbind(assoc,data.frame(paste("PC",i,sep=""),p))
    
}
colnames(assoc) <- c("PC","P-value")
kable(assoc) %>% kable_styling()
PC P-value
PC1 0.8586393
PC2 0.6632116
PC3 0.5587200
PC4 0.2870217
PC5 0.0805153

Brain

brain <- human[which(human$Characteristics.organism.part.!="heart"),]
brain <- brain[order(brain$Source.Name),]
b <- rna[,colnames(rna) %in% brain$Source.Name]
b.vst <- vst(b)
b.vst <- b.vst[,order(colnames(b.vst))]


# identical(brain$Source.Name,colnames(h.vst))
# [1] TRUE

pc <- prcomp(t(b.vst))
pca <- as.data.frame(pc$x)
Female
kable(table(brain$Stage[which(brain$Characteristics.sex.=="female")],brain$Characteristics.organism.part.[which(brain$Characteristics.sex.=="female")])) %>% kable_styling()
forebrain hindbrain
embryo 11 14
infant 0 1
neonate 2 4
school age child 0 1
toddler 1 1
young adult 0 1
Male
kable(table(brain$Stage[which(brain$Characteristics.sex.=="male")],brain$Characteristics.organism.part.[which(brain$Characteristics.sex.=="male")])) %>% kable_styling()
forebrain hindbrain
adolescent 4 4
elderly 2 2
embryo 21 19
infant 2 2
middle adult 2 2
neonate 1 1
school age child 2 2
toddler 2 1
young adult 5 4
plot(pca[,1],pca[,2],col=rainbow(length(unique(brain$Characteristics.sex.)))[as.factor(brain$Characteristics.sex.)],pch=c(15,16,4,17,18,8,9,11,3)[as.factor(brain$Stage)], cex = 3,main = "Human Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Sex",xlab="PC1",ylab="PC2")

legend("topleft",c("female","male"),col=rainbow(length(unique(brain$Characteristics.sex.))),lty=1:1,lwd=5,cex=1.5,bty = "n")

legend("bottomleft",c("adolescent", "elderly", "embryo","infant", "middle adult", "neonate", "school age child", "toddler", "young adult"),pch=c(15,16,4,17,18,8,9,11,3),cex=1.25,bty = "n")

pairs(pca[,1:5],col=rainbow(length(unique(brain$Characteristics.sex.)))[as.factor(brain$Characteristics.sex.)],pch=16, main = "PC1 - PC5")
Association between PC1-5 and Sex
trim <- data.frame(pca[,1:5],Sex=sapply(rownames(pca),function(samp){strsplit(samp,"\\.")[[1]][5]}))

assoc <- c()
for(i in 1:5){
    p<-t.test(trim[which(trim$Sex=="Female"),i],trim[which(trim$Sex=="Male"),i],alternative = "two.sided")$p.value
    assoc <- rbind(assoc,data.frame(paste("PC",i,sep=""),p))
    
}
colnames(assoc) <- c("PC","P-value")
kable(assoc) %>% kable_styling()
PC P-value
PC1 0.0094184
PC2 0.8183489
PC3 0.1072408
PC4 0.6007109
PC5 0.7569046
plot(pca[,1],pca[,2],col=rainbow(length(unique(brain$Characteristics.organism.part.)))[as.factor(brain$Characteristics.organism.part.)],pch=c(15,16,4,17,18,8,9,11,3)[as.factor(brain$Stage)], cex = 3,main = "Human Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Brain Tissue",xlab="PC1",ylab="PC2")

legend("topleft",c("forebrain","hindbrain"),col=rainbow(length(unique(brain$Characteristics.organism.part.))),lty=1:1,lwd=5,cex=1.5,bty = "n")

legend("bottomleft",c("adolescent", "elderly", "embryo","infant", "middle adult", "neonate", "school age child", "toddler", "young adult"),pch=c(15,16,4,17,18,8,9,11,3),cex=1.25,bty = "n")
plot(pca[,1],pca[,2],col=stage_colors[as.factor(brain$Characteristics.developmental.stage.)],pch=c(4,16)[as.factor(brain$Characteristics.sex.)], cex = 2,main = "Human Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Stage",xlab="PC1",ylab="PC2")

legend("topleft",c("10 week post conception", "11 week post conception", "12 week post conception", "13 week post conception",
"16 week post conception", "18 week post conception", "19 week post conception", "4 week post conception", 
"5 week post conception","6 week post conception","7 week post conception", "8 week post conception", "9 week post conception", "adolescent", "elderly", "infant","middle adult", "neonate", "school age child", "toddler", "young adult"),col=stage_colors,lwd=3,cex=1,bty = "n",y.intersp = 0.7)

legend("bottomleft",c("female","male"),pch=c(4,16),cex=1.25,bty = "n")

Mouse

mouse <- read.delim("~/Desktop/MISO_proj/EuropeanDataset/data/Sample_Metadata_Mouse_Heart-Brain-Only.tsv",sep="\t",header=TRUE,stringsAsFactors = FALSE)
mouse <- mouse[order(mouse$Source.Name),]
rna <- read.delim("~/Desktop/faster_Datastore/Alisha/MISO_project/EuropeanDataset/mouse/RNAseq/salmon_gene.matrix.tsv",sep="\t", header = T,row.names = 1,check.names = FALSE)
rna <- rna[,order(colnames(rna))]

rms <- rowMeans(rna)
rna <- rna[which(rms > 0),]
rna <- apply(rna,2,round)
Female
kable(table(mouse$Characteristics.developmental.stage.[which(mouse$Characteristics.sex.=="female")],mouse$Characteristics.organism.part.[which(mouse$Characteristics.sex.=="female")])) %>% kable_styling()
brain forebrain heart hindbrain
embryo 6 12 17 12
postnatal 0 10 10 9
Male
kable(table(mouse$Characteristics.developmental.stage.[which(mouse$Characteristics.sex.=="male")],mouse$Characteristics.organism.part.[which(mouse$Characteristics.sex.=="male")])) %>% kable_styling()
brain forebrain heart hindbrain
embryo 5 12 18 12
postnatal 0 10 10 10

Brain and Heart Together

rna.vst <- vst(rna)

pc <- prcomp(t(rna.vst))
pca <- as.data.frame(pc$x)

age_colors <- c("#FF0000","#FF6D00","#FFDB00","#B6FF00","#006600","#00FF24","#000000","#00FFFF","#0092FF","#0024FF","#4900FF","#B600FF","#FF00DB","#FF006D")
plot(pca[,2],pca[,1],col=rainbow(length(unique(mouse$Characteristics.organism.part.)))[as.factor(mouse$Characteristics.organism.part.)],pch=c(4,16)[as.factor(mouse$Characteristics.developmental.stage.)], cex = 3,main = "Mouse PC1 vs PC2\n Calculated Using RNA-seq",xlab="PC2",ylab="PC1")

legend("topleft",c("brain","forebrain","heart","hindbrain"),col=rainbow(length(unique(mouse$Characteristics.organism.part.))),lty=1:1,lwd=5,cex=1.25,bty = "n")

legend("bottomleft",c("embryo","postnatal"),pch=c(4,16),cex=1.5,bty = "n")

Heart

heart <- mouse[which(mouse$Characteristics.organism.part.=="heart"),]
heart <- heart[order(heart$Source.Name),]
h <- rna[,colnames(rna) %in% heart$Source.Name]
h.vst <- vst(h)
h.vst <- h.vst[,order(colnames(h.vst))]

# identical(heart$Source.Name,colnames(h.vst))
# [1] TRUE

pc <- prcomp(t(h.vst))
pca <- as.data.frame(pc$x)
kable(table(heart$Characteristics.developmental.stage.,heart$Characteristics.sex.)) %>% kable_styling()
female male
embryo 17 18
postnatal 10 10
plot(pca[,1],pca[,2],col=rainbow(length(unique(heart$Characteristics.sex.)))[as.factor(heart$Characteristics.sex.)],pch=c(4,16)[as.factor(heart$Characteristics.developmental.stage.)], cex = 3,main = "Mouse Heart PC1 vs PC2\n Calculated Using RNA-seq\nColored by Sex",xlab="PC1",ylab="PC2")

legend("topleft",c("female","male"),col=rainbow(length(unique(heart$Characteristics.sex.))),lty=1:1,lwd=5,cex=1.5,bty = "n")

legend("bottomleft",c("embryo","postnatal"),pch=c(4,16),cex=1.5,bty = "n")

pairs(pca[,1:5],col=rainbow(length(unique(heart$Characteristics.sex.)))[as.factor(heart$Characteristics.sex.)],pch=16, main = "PC1 - PC5")
Association between PC1-5 and Sex
trim <- data.frame(pca[,1:5],Sex=sapply(rownames(pca),function(samp){strsplit(samp,"\\.")[[1]][5]}))

assoc <- c()
for(i in 1:5){
    p<-t.test(trim[which(trim$Sex=="Female"),i],trim[which(trim$Sex=="Male"),i],alternative = "two.sided")$p.value
    assoc <- rbind(assoc,data.frame(paste("PC",i,sep=""),p))
    
}
colnames(assoc) <- c("PC","P-value")
kable(assoc) %>% kable_styling()
PC P-value
PC1 0.9065778
PC2 0.7982510
PC3 0.8513815
PC4 0.7351453
PC5 0.3609661
plot(pca[,1],pca[,2],col=age_colors[as.factor(heart$Characteristics.age.)],pch=c(4,16)[as.factor(heart$Characteristics.sex.)], cex = 2,main = "Mouse Heart PC1 vs PC2\n Calculated Using RNA-seq\nColored by Age",xlab="PC1",ylab="PC2")

legend("bottomleft",c("0 days - postnatal","3 days postnatal","E10.5 days","E11.5 days","E12.5 days","E13.5 days","14 days - postnatal","E14.5 days","E15.5 days","E16.5 days","E17.5 days","E18.5 days","28 days - postnatal","63 days - postnatal"), col=age_colors,lty=1:1,lwd=5,cex=1,bty = "n")

legend("topleft",c("female","male"),pch=c(4,16),cex=1,bty = "n")

Brain

brain <- mouse[which(mouse$Characteristics.organism.part.!="heart"),]
brain <- brain[order(brain$Source.Name),]
b <- rna[,colnames(rna) %in% brain$Source.Name]
b.vst <- vst(b)
b.vst <- b.vst[,order(colnames(b.vst))]
# identical(brain$Source.Name,colnames(h.vst))
# [1] TRUE

pc <- prcomp(t(b.vst))
pca <- as.data.frame(pc$x)
Female
kable(table(brain$Characteristics.developmental.stage.[which(brain$Characteristics.sex.=="female")],brain$Characteristics.organism.part.[which(brain$Characteristics.sex.=="female")])) %>% kable_styling()
brain forebrain hindbrain
embryo 6 12 12
postnatal 0 10 9
Male
kable(table(brain$Characteristics.developmental.stage.[which(brain$Characteristics.sex.=="male")],brain$Characteristics.organism.part.[which(brain$Characteristics.sex.=="male")])) %>% kable_styling()
brain forebrain hindbrain
embryo 5 12 12
postnatal 0 10 10
plot(pca[,1],pca[,2],col=rainbow(length(unique(brain$Characteristics.sex.)))[as.factor(brain$Characteristics.sex.)],pch=c(4,16)[as.factor(brain$Characteristics.developmental.stage.)], cex = 3,main = "Mouse Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Sex",xlab="PC1",ylab="PC2")

legend("topleft",c("female","male"),col=rainbow(length(unique(brain$Characteristics.sex.))),lty=1:1,lwd=5,cex=1.5,bty = "n")

legend("topright",c("embryo","postnatal"),pch=c(4,16),cex=1.5,bty = "n")

pairs(pca[,1:5],col=rainbow(length(unique(brain$Characteristics.sex.)))[as.factor(brain$Characteristics.sex.)],pch=16, main = "PC1 - PC5")
Association between PC1-5 and Sex
trim <- data.frame(pca[,1:5],Sex=sapply(rownames(pca),function(samp){strsplit(samp,"\\.")[[1]][5]}))

assoc <- c()
for(i in 1:5){
    p<-t.test(trim[which(trim$Sex=="Female"),i],trim[which(trim$Sex=="Male"),i],alternative = "two.sided")$p.value
    assoc <- rbind(assoc,data.frame(paste("PC",i,sep=""),p))
    
}
colnames(assoc) <- c("PC","P-value")
kable(assoc) %>% kable_styling()
PC P-value
PC1 0.9617070
PC2 0.8976413
PC3 0.9060813
PC4 0.9084086
PC5 0.9787213
plot(pca[,1],pca[,2],col=rainbow(length(unique(brain$Characteristics.organism.part.)))[as.factor(brain$Characteristics.organism.part.)],pch=c(4,16)[as.factor(brain$Characteristics.developmental.stage.)], cex = 3,main = "Mouse Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Brain Tissue",xlab="PC1",ylab="PC2")

legend("topleft",c("brain","forebrain","hindbrain"),col=rainbow(length(unique(brain$Characteristics.organism.part.))),lty=1:1,lwd=5,cex=1.5,bty = "n")

legend("topright",c("embryo","postnatal"),pch=c(4,16),cex=1.5,bty = "n")

plot(pca[,1],pca[,2],col=age_colors[as.factor(brain$Characteristics.age.)],pch=c(4,16)[as.factor(brain$Characteristics.sex.)], cex = 2,main = "Mouse Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Age",xlab="PC1",ylab="PC2")

legend("topright",c("0 days - postnatal","3 days postnatal","E10.5 days","E11.5 days","E12.5 days","E13.5 days","14 days - postnatal","E14.5 days","E15.5 days","E16.5 days","E17.5 days","E18.5 days","28 days - postnatal","63 days - postnatal"), col=age_colors,lty=1:1,lwd=5,cex=1,bty = "n")

legend("topleft",c("female","male"),pch=c(4,16),cex=1,bty = "n")
plot(pca[,1],pca[,2],col=age_colors[as.factor(brain$Characteristics.age.)],pch=c(16,17,8)[as.factor(brain$Characteristics.organism.part.)], cex = 2,main = "Mouse Brain PC1 vs PC2\n Calculated Using RNA-seq\nColored by Age",xlab="PC1",ylab="PC2")

legend("topright",c("0 days - postnatal","3 days postnatal","E10.5 days","E11.5 days","E12.5 days","E13.5 days","14 days - postnatal","E14.5 days","E15.5 days","E16.5 days","E17.5 days","E18.5 days","28 days - postnatal","63 days - postnatal"), col=age_colors,lty=1:1,lwd=5,cex=1,bty = "n")

legend("topleft",c("brain","forebrain","hindbrain"),pch=c(16,17,8),cex=1,bty = "n")
Heart and brain mouse samples
kable(table(mouse$Characteristics.age.,mouse$Characteristics.developmental.stage.)) %>% kable_styling()
embryo postnatal
0 0 12
3 0 12
10.5 7 0
11.5 8 0
12.5 8 0
13.5 12 0
14 0 11
14.5 11 0
15.5 12 0
16.5 12 0
17.5 12 0
18.5 12 0
28 0 12
63 0 12
kable(table(brain$Characteristics.organism.part.,brain$Characteristics.age.)) %>% kable_styling()
0 3 10.5 11.5 12.5 13.5 14 14.5 15.5 16.5 17.5 18.5 28 63
brain 0 0 3 4 4 0 0 0 0 0 0 0 0 0
forebrain 4 4 0 0 0 4 4 4 4 4 4 4 4 4
hindbrain 4 4 0 0 0 4 3 4 4 4 4 4 4 4
fore <- brain[which(brain$Characteristics.organism.part.=="forebrain"),]
hind <- brain[which(brain$Characteristics.organism.part.=="hindbrain"),]

age_colors <- c("#FF0000","#FF6D00","#00FF24","#000000","#00FFFF","#0092FF","#0024FF","#4900FF","#B600FF","#FF00DB","#FF006D")
fb <- rna[,colnames(rna) %in% fore$Source.Name]
fb.vst <- vst(fb)
## converting counts to integer mode
fb.vst <- fb.vst[,order(colnames(fb.vst))]


pc <- prcomp(t(fb.vst))
pca <- as.data.frame(pc$x)
plot(pca[,1],pca[,2],col=age_colors[as.factor(fore$Characteristics.age.)], 
     cex = 0.7,
     pch=16,
     main = "Mouse Forebrain Only\nPC1 vs PC2\nColored by Age",xlab="PC1",ylab="PC2")

text(pca[,1],pca[,2],fore$Characteristics.age.,col=age_colors[as.factor(fore$Characteristics.age.)]) 

# legend("bottomleft",c("0 days - postnatal","3 days postnatal","E10.5 days","E11.5 days","E12.5 days","E13.5 days","14 days - postnatal","E14.5 days","E15.5 days","E16.5 days","E17.5 days","E18.5 days","28 days - postnatal","63 days - postnatal"), col=age_colors,lty=1:1,lwd=5,cex=0.7,bty = "n")

# legend("topleft",c("female","male"),pch=c(4,16),cex=1,bty = "n")
hb <- rna[,colnames(rna) %in% hind$Source.Name]
hb.vst <- vst(hb)
## converting counts to integer mode
hb.vst <- hb.vst[,order(colnames(hb.vst))]


pc <- prcomp(t(hb.vst))
pca <- as.data.frame(pc$x)
plot(pca[,1],pca[,2],col=rainbow(length(unique(hind$Characteristics.age.)))[as.factor(hind$Characteristics.age.)], 
     cex = 0.7,
     pch=16,
     main = "Mouse Hindbrain Only\nPC1 vs PC2\nColored by Age",xlab="PC1",ylab="PC2")

text(pca[,1],pca[,2],hind$Characteristics.age.,col=age_colors[as.factor(hind$Characteristics.age.)]) 

# legend("bottomleft",c("0 days - postnatal","3 days postnatal","E10.5 days","E11.5 days","E12.5 days","E13.5 days","14 days - postnatal","E14.5 days","E15.5 days","E16.5 days","E17.5 days","E18.5 days","28 days - postnatal","63 days - postnatal"), col=age_colors,lty=1:1,lwd=5,cex=0.7,bty = "n")

# legend("topleft",c("female","male"),pch=c(4,16),cex=1,bty = "n")